Data munging

First, load the table of MPC add-ons and identify those most affected by the fix.

addonsDT <- fread("addons-mpc.csv")
## Change "None" to NA.
for(col in names(addonsDT)) {
    addonsDT[get(col) == "None", eval(col) := NA]
}
## Convert boolean columns to logical (loaded as character because of "None"s)
## via numeric (since "TRUE/FALSE" strings were loaded as "1"/"0").
bool_cols <- c("node_modules", "packages", "package_json")
addonsDT[, eval(bool_cols) :=
    lapply(bool_cols, function(col) { as.logical(as.numeric(get(col))) })]
## Add-ons affected by the fix have package_json TRUE and large numbers of
## JS files.
affectedAddonsDT <- addonsDT[package_json == TRUE,
    list(guid = id, users, js_files,
        name = sub("^([^/]*/)+([^-]+)(.*)$", "\\2", download_url))]
save(addonsDT, affectedAddonsDT, file = "addons-tables.RData")

Load the UT data collected in this notebook. It is split across 3 data tables:

  • main with one row per session containing scalar measures
  • addons with one row per add-on per session
  • hangs with one row per hang time (histogram bin) per session

The data consists of UT sessions from Nightly over two periods:

  • before: builds dated between 2016-10-09 and 2016-10-15
  • after: builds dated between 2016-10-24 and 2016-10-31

The after builds have the updated add-on SDK code.

datadir <- "addon-sdk-fix-data_2016-11-02"
dt_main <- as.data.table(read_feather(file.path(datadir, "main.feather")))
dt_addons <- as.data.table(read_feather(file.path(datadir, "addons.feather")))
dt_hangs <- as.data.table(read_feather(file.path(datadir, "hangs.feather")))

## Reformat some columns for convenience.
dt_main[, build_date := as.Date(substr(build_id, 1, 8), format = "%Y%m%d")]
## Ignore versions for Mac and Linux.
dt_main[sys_os != "Windows_NT", sys_os_version := NA]

## Identify add-ons of interest.

## Categorize add-ons according to the number of JS files:
## 50+ (heavy), 20-50 (medium), all (affected).
affectedAddonsDT[, heavy_js := js_files >= 50][,
    medium_js := js_files >= 20 & js_files < 50][,
    affected := TRUE]
setkey(affectedAddonsDT, guid)
setkey(dt_addons, guid)
dt_addons_cols <- names(dt_addons)
affected_addons_cols <- c("heavy_js", "medium_js", "affected")
dt_addons <- affectedAddonsDT[dt_addons,
    c(dt_addons_cols, affected_addons_cols), with = FALSE]
for(acol in affected_addons_cols)
    dt_addons[is.na(get(acol)), eval(acol) := FALSE]
## Identify VDH 6.1.1 separately.
dt_addons[, is_vdh := (guid == "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}" &
    substr(version, 1, 5) == "6.1.1")]

## Summarize each session's add-ons according to whether they had affected
## add-ons.
session_addons <- dt_addons[, list(has_vdh = any(is_vdh),
    num_addons_affected = sum(affected),
    num_addons_medium_js = sum(medium_js),
    num_addons_heavy_js = sum(heavy_js)),
    keyby = session_id]
setkey(dt_main, session_id)
dt_main <- session_addons[dt_main]
dt_main[is.na(has_vdh), has_vdh := FALSE]
for(acol in grep("^num_addons", names(session_addons), value = TRUE))
    dt_main[is.na(get(acol)), eval(acol) := 0]

rm(dt_addons_cols)
save(list = ls(pattern = "^dt_"), file = "addon-sdk-working-data.RData")

What does this data look like?

head(dt_main)
##    session_id has_vdh num_addons_affected num_addons_medium_js
## 1:          0   FALSE                   0                    0
## 2:          1   FALSE                   0                    0
## 3:          2   FALSE                   0                    0
## 4:          3   FALSE                   0                    0
## 5:          4   FALSE                   0                    0
## 6:          5   FALSE                   0                    0
##    num_addons_heavy_js client_id addons_sys_num       build_id e10s
## 1:                   0        38              5 20161010030204 TRUE
## 2:                   0        38              5 20161010030204 TRUE
## 3:                   0        38              5 20161009030202 TRUE
## 4:                   0        38              5 20161009030202 TRUE
## 5:                   0        38              6 20161029062601 TRUE
## 6:                   0        38              5 20161015030203 TRUE
##    has_hangs num_addons_nonsys period shutdown startup_AMIend
## 1:      TRUE                 2 before      914           2182
## 2:      TRUE                 2 before     5890           3063
## 3:     FALSE                 2 before     1096            355
## 4:      TRUE                 2 before    43626           2540
## 5:     FALSE                 2  after      719           2112
## 6:      TRUE                 2 before    10042           3810
##    startup_AMIstart startup_XPIstart startup_firstpaint startup_main
## 1:             1933             1982               3839         1823
## 2:             2135             2717               4811         1649
## 3:              150              197               1007           68
## 4:             2070             2279               3776         1674
## 5:             1633             1975               2591         1406
## 6:             3052             3457               5710         1692
##    startup_sessionrestored startup_toplevelwindow sys_arch sys_cpu_count
## 1:                    4960                   2254   x86-64             8
## 2:                    5271                   3858   x86-64             8
## 3:                    1212                    629   x86-64             8
## 4:                    3929                   3155   x86-64             8
## 5:                    3468                   2161   x86-64             8
## 6:                    6439                   4309   x86-64             8
##    sys_mem     sys_os sys_os_version was_startup_interrupted constant_e10s
## 1:    8140 Windows_NT            6.1                   FALSE          TRUE
## 2:    8140 Windows_NT            6.1                   FALSE          TRUE
## 3:    8140 Windows_NT            6.1                   FALSE          TRUE
## 4:    8140 Windows_NT            6.1                   FALSE          TRUE
## 5:    8140 Windows_NT            6.1                   FALSE          TRUE
## 6:    8140 Windows_NT            6.1                   FALSE          TRUE
##    both_periods constant_addons constant_addons_guid build_date
## 1:         TRUE           FALSE                 TRUE 2016-10-10
## 2:         TRUE           FALSE                 TRUE 2016-10-10
## 3:         TRUE           FALSE                 TRUE 2016-10-09
## 4:         TRUE           FALSE                 TRUE 2016-10-09
## 5:         TRUE           FALSE                 TRUE 2016-10-29
## 6:         TRUE           FALSE                 TRUE 2016-10-15
head(dt_addons)
##    client_id
## 1:     42306
## 2:     42306
## 3:     42306
## 4:     42306
## 5:     42306
## 6:     42306
##                                                                             guid
## 1: 00cf4073-9c0d-4c73-823c-9627a9ebda10@5ce0c315-7a90-4c46-8428-5c0df674cab0.com
## 2: 00cf4073-9c0d-4c73-823c-9627a9ebda10@5ce0c315-7a90-4c46-8428-5c0df674cab0.com
## 3: 00cf4073-9c0d-4c73-823c-9627a9ebda10@5ce0c315-7a90-4c46-8428-5c0df674cab0.com
## 4: 00cf4073-9c0d-4c73-823c-9627a9ebda10@5ce0c315-7a90-4c46-8428-5c0df674cab0.com
## 5: 00cf4073-9c0d-4c73-823c-9627a9ebda10@5ce0c315-7a90-4c46-8428-5c0df674cab0.com
## 6: 00cf4073-9c0d-4c73-823c-9627a9ebda10@5ce0c315-7a90-4c46-8428-5c0df674cab0.com
##    session_id  version heavy_js medium_js affected is_vdh
## 1:     680843 0.95.181    FALSE     FALSE    FALSE  FALSE
## 2:     680844 0.95.181    FALSE     FALSE    FALSE  FALSE
## 3:     680845 0.95.181    FALSE     FALSE    FALSE  FALSE
## 4:     680846 0.95.181    FALSE     FALSE    FALSE  FALSE
## 5:     680847 0.95.181    FALSE     FALSE    FALSE  FALSE
## 6:     680848 0.95.181    FALSE     FALSE    FALSE  FALSE
head(dt_hangs)
##    client_id count hang_time num_hang_stats session_id
## 1:        38     1      1023              1          0
## 2:        38     1      1023              1          1
## 3:        38     1      1023              1          3
## 4:        38     1      1023              1          5
## 5:        38     1      1023              3          6
## 6:        38     2       255              3          6

Longitudinal histories

Ideally, we want to focus on profiles that were active both before and after the change, so that we can compare the paired before/after differences across profiles. To do this, we also require profiles to have a constant e10s setting and list of active non-system add-ons (ignoring add-on versions) across all their sessions.

dt_good <- dt_main[both_periods & constant_e10s & constant_addons_guid]
nprof_good <- dt_good[, length(unique(client_id))]

We have 11,928 such profiles (20.3%) with a total of 303,243 (32.3%) sessions.

How many sessions have e10s enabled, and have (non-system) add-ons?

dt_good[, list(n_sessions = .N), by = list(e10s, has_addons = num_addons_nonsys > 0)][
    order(e10s, has_addons, decreasing = TRUE)][,
    pct_sessions := pctLabelText(n_sessions / nrow(dt_good))][,
    n_sessions := bigNum(n_sessions)][]
##     e10s has_addons n_sessions pct_sessions
## 1:  TRUE       TRUE    264,529        87.2%
## 2:  TRUE      FALSE     14,801         4.9%
## 3: FALSE       TRUE     23,545         7.8%
## 4: FALSE      FALSE        368         0.1%

Note that the vast majority of profiles have e10s enabled and have non-system active add-ons.

For those sessions that have add-ons, how many have add-ons most affected by the changes, and specifically VDH?

dt_good[num_addons_nonsys > 0, list(n_sessions = .N),
    by = list(has_addons_affected = num_addons_affected > 0, has_addons_medium_js = num_addons_medium_js > 0,
        has_addons_heavy_js = num_addons_heavy_js > 0, has_vdh)][
    order(has_addons_affected, has_addons_medium_js, has_addons_heavy_js, has_vdh)][,
    pct_sessions := pctLabelText(n_sessions / nrow(dt_good))][,
    n_sessions := bigNum(n_sessions)][]
##    has_addons_affected has_addons_medium_js has_addons_heavy_js has_vdh
## 1:               FALSE                FALSE               FALSE   FALSE
## 2:                TRUE                FALSE               FALSE   FALSE
## 3:                TRUE                FALSE                TRUE   FALSE
## 4:                TRUE                FALSE                TRUE    TRUE
## 5:                TRUE                 TRUE               FALSE   FALSE
## 6:                TRUE                 TRUE                TRUE   FALSE
## 7:                TRUE                 TRUE                TRUE    TRUE
##    n_sessions pct_sessions
## 1:    215,232        71.0%
## 2:     38,676        12.8%
## 3:      7,017         2.3%
## 4:     19,207         6.3%
## 5:      6,597         2.2%
## 6:        428         0.1%
## 7:        917         0.3%

Comparisons

We compare sessions before and after the fixes landed on the basis of hang times, for hangs that are related to sdk/addon/runner.js (have this script appearing in their stack trace). If a profile did have such hangs, they are recorded in a histogram split on [256, 512, …, 16384] (times in ms). Both a reduction in occurrence of hangs and a reduction in the hang times themselves are considered improvements.

Per-session hangs

As a first pass, compare the hang stats for individual sessions before and after. A session’s hang times are summarized by taking the log (base 2) of the values (histogram breaks), and taking the mean over all values observed in that session. Under this transformation, hang times range on a linear scale between 8 and 14. Separate out sessions with no hangs.

dt_hangs_mean <- dt_hangs[, list(hang_time = wtd.mean(log(hang_time + 1, 2), count),
    ## Total hang time over the session.
    hang_time_tot = sum(log(hang_time + 1, 2) * count)), keyby = session_id]
setkey(dt_good, session_id)
dt_good <- dt_hangs_mean[dt_good]
dt_good[is.na(hang_time), hang_time := 1][is.na(hang_time_tot), hang_time_tot := 1]
## Drop sessions with interrupted startup.
dt_good <- dt_good[was_startup_interrupted == FALSE]
## Order the period labels for display.
dt_good[, period := factor(period, levels = c("before", "after"))]
## Add indicators for add-ons.
dt_good[, addon_state := factor(ifelse(num_addons_nonsys > 0,
    ifelse(num_addons_affected > 0, "has affected addons", "has addons, no affected"), "no addons"),
    levels = c("has affected addons", "has addons, no affected", "no addons"))]
dt_good[, e10s_state := factor(ifelse(e10s, "e10s on", "e10s off"), levels = c("e10s on", "e10s off"))]

Are there differences in per-session mean hang times, across sessions with hangs? Break down by e10s status and addons present.

## Boxplots of hang times for sessions with hangs.
ggplot(dt_good[has_hangs == TRUE], aes(period, hang_time)) +
    geom_boxplot() +
    facet_grid(e10s_state ~ addon_state) +
    labs(title = "Per-session mean hang (runner.js) times before and after the change\nfor sessions with hangs",
        x = "Period",
        y = "Mean log2(hang time)")

This already shows that mean hang times have clearly improved after the fixes landed.

Consider also total hang times over the session, which corresponds more closely to the user’s experience.

## Boxplots of hang times for sessions with hangs.
ggplot(dt_good[has_hangs == TRUE], aes(period, log(hang_time_tot))) +
    geom_boxplot() +
    facet_grid(e10s_state ~ addon_state) +
    labs(title = "Per-session total hang (runner.js) times before and after the change\nfor sessions with hangs",
        x = "Period",
        y = "Total log2(hang time)")

These distributions are highly skewed, as they depend among other things on the number of installed add-ons. However, there are signs of improvement overall.

Finally, compare the proportions of sessions that have hangs.

## Proportions of sessions without hangs.
ggplot(dt_good[, list(prop_hangs = mean(has_hangs)), by = list(e10s_state, addon_state, period)],
        aes(period, prop_hangs)) +
    geom_bar(stat = "identity") + 
    facet_grid(e10s_state ~ addon_state) +
    scale_y_continuous(breaks = interval.breaks(0.2), labels = pct.labels,
        limits = c(0,1)) +
    labs(title = "Percentage of sessions with hangs (runner.js)",
        x = "Period",
        y = "Percent of sessions")

Here again are signs of improvement. This also shows that affected add-ons are much more likely to cause hangs than other add-ons, as expected.

Per-client hangs

We now compare the change from before to after on a per-client basis. A client’s hangs in each period are summarized by taking the mean of the per-session means, as described above. For this purpose, sessions with no hangs are counted as 1. We look at both the change in mean hang time for profiles that had hangs either before or after, as well as the change in the proportion of sessions with hangs.

## Many columns should have the same value across all sessions for a given client.
## Just keep a single client-level value for these.
per_client_cols <- c("e10s", "e10s_state", "num_addons_nonsys", "num_addons_affected",
    "num_addons_medium_js", "num_addons_heavy_js", "has_vdh",
    grep("^sys", names(dt_good), value = TRUE))
dt_good_prof <- dt_good[, c(list(
    hang_time_diff = mean(hang_time[period == "after"]) - mean(hang_time[period == "before"]),
    has_hangs_before = any(has_hangs[period == "before"]),
    has_hangs_after = any(has_hangs[period == "after"]),
    n_sess_before = sum(period == "before"),
    n_sess_after = sum(period == "after"),
    p_sess_hangs_before = mean(has_hangs[period == "before"]),
    p_sess_hangs_after = mean(has_hangs[period == "after"]),
    p_hangs_diff = mean(has_hangs[period == "after"]) - mean(has_hangs[period == "before"])),
    setNames(lapply(per_client_cols, function(ccol) { get(ccol)[1] }), per_client_cols)),
    by = client_id]
dt_good_prof[, has_hangs := has_hangs_before | has_hangs_after]

## Make boxplots of differences according to add-on types and e10s status, for profiles that had hangs.
add_addon_type_col <- function(DT) {
    DT[, addon_type := factor(ifelse(num_addons_nonsys == 0, "no addons",
        ifelse(num_addons_affected == 0, "addons,\nno affected",
        ifelse(num_addons_heavy_js == 0, "has affected,\nno heavy-js",
        ifelse(!has_vdh, "has heavy-js", "has VDH")))),
        levels = c("no addons", "addons,\nno affected", "has affected,\nno heavy-js",
            "has heavy-js", "has VDH"))]
}
add_addon_type_col(dt_good_prof)

## Boxplots of mean difference of hang times by add-on type.
plot_diff_addon <- function(DT) {
    ggplot(DT, aes(addon_type, hang_time_diff)) +
        geom_boxplot() +
        geom_hline(yintercept = 0, size = 0.5, colour = "blue") +
        scale_y_continuous(breaks = interval.breaks(4)) +
        labs(title = "Change in client mean hang (runner.js) times\nfor clients with hangs (negative means improvement)",
            x = "Client's add-ons type",
            y = "Difference in log2(hang time)")
}
plot_diff_addon(dt_good_prof[has_hangs == TRUE]) + facet_wrap(~e10s_state)
## Warning: Removed 46 rows containing non-finite values (stat_boxplot).

## Boxplots of the difference in proportion of sessions with hangs.
ggplot(dt_good_prof[has_hangs == TRUE], aes(addon_type, p_hangs_diff)) +
    geom_boxplot() +
    facet_wrap(~e10s_state) +
    geom_hline(yintercept = 0, size = 0.5, colour = "blue") +
#    scale_y_continuous(breaks = interval.breaks(4)) +
    labs(title = "Change in % sessions with hangs (runner.js)\nfor clients with hangs (negative means improvement)",
        x = "Client's add-ons type",
        y = "Difference in % sessions with hangs")
## Warning: Removed 46 rows containing non-finite values (stat_boxplot).

There is clear improvement in the mean hang time, for profiles that had any add-ons. The case for proportions of sessions with hangs is less conclusive.

What are the sample sizes for these groups?

dt_good_prop_hangs <- dt_good_prof[, list(
    has_hangs = mean(has_hangs),
    n_has_hangs = sum(has_hangs)),
    by = list(e10s_state, addon_type)]
ggplot(dt_good_prop_hangs, aes(addon_type, n_has_hangs)) +
    geom_bar(stat = "identity") +
    facet_wrap(~e10s_state) +
    labs(title = "Number of clients with hangs (runner.js)\neither before or after",
        x = "Client's add-ons type",
        y = "Num clients")

What proportions do these represent?

## Also plot the corresponding proportions of clients with hangs.
plot_prop_addon <- function(DT) {
    ggplot(DT, aes(addon_type, has_hangs)) +
        geom_bar(stat = "identity") +
        scale_y_continuous(breaks = interval.breaks(0.2), labels = pct.labels,
            limits = c(0,1)) +
        labs(title = "Percentage of clients with hangs (runner.js)\neither before or after",
            x = "Client's add-ons type",
            y = "Percent of clients")
}
plot_prop_addon(dt_good_prop_hangs) + facet_wrap(~e10s_state)

Effect size estimation for hangs

We now have evidence that the add-on SDK fix had a positive effect. We fit a linear model to compute effect sizes and test significance.

Recall that sessions with no hangs are counted in the per-client-period mean as 1, done primarily for convenience. Although this doesn’t fit naturally into the scale of the hang time measurements for actual hangs, we find that the additional variability this causes is nicely handled by using the change in proportion of sessions with hangs as a predictor.

dt_good_prof[, has_hangs_both := has_hangs_before & has_hangs_after]
dt_prof_hangs <- dt_good_prof[has_hangs == TRUE & !is.na(hang_time_diff)]
#dt_prof_hangs <- dt_good_prof[has_hangs_both == TRUE & !is.na(hang_time_diff)]
dt_prof_hangs[, has_addons := num_addons_nonsys > 0][,
    has_affected_addons := num_addons_affected > 0][,
    has_heavy_js_addons := num_addons_heavy_js > 0]
## The change in the number of sessions observed before and after.
dt_prof_hangs[, n_sess_diff := n_sess_after - n_sess_before]
## Combine OS and version.
dt_prof_hangs[, sys_os_with_ver := ifelse(sys_os == "Windows_NT", sprintf("Win %s", sys_os_version), sys_os)]

First try fitting a model with all relevant covariates.

fit <- lm(hang_time_diff ~
    p_hangs_diff + has_hangs_both + n_sess_diff +
    e10s + has_addons + has_affected_addons + has_vdh +
    num_addons_nonsys + num_addons_affected +
    sys_os_with_ver + sys_arch + sys_cpu_count + sys_mem,
    data = dt_prof_hangs)
summary(fit)
## 
## Call:
## lm(formula = hang_time_diff ~ p_hangs_diff + has_hangs_both + 
##     n_sess_diff + e10s + has_addons + has_affected_addons + has_vdh + 
##     num_addons_nonsys + num_addons_affected + sys_os_with_ver + 
##     sys_arch + sys_cpu_count + sys_mem, data = dt_prof_hangs)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9840 -0.4277 -0.0079  0.4166  3.2070 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              4.590e-02  6.746e-02   0.680  0.49632    
## p_hangs_diff             7.731e+00  2.580e-02 299.667  < 2e-16 ***
## has_hangs_bothTRUE      -8.060e-01  2.811e-02 -28.674  < 2e-16 ***
## n_sess_diff              3.147e-05  3.971e-04   0.079  0.93682    
## e10sTRUE                 3.104e-02  2.924e-02   1.062  0.28833    
## has_addonsTRUE          -1.643e-01  5.339e-02  -3.077  0.00210 ** 
## has_affected_addonsTRUE -1.944e-01  2.777e-02  -7.001 2.81e-12 ***
## has_vdhTRUE             -4.159e-02  2.776e-02  -1.498  0.13408    
## num_addons_nonsys        9.999e-03  1.724e-03   5.799 7.00e-09 ***
## num_addons_affected     -3.425e-02  1.720e-02  -1.991  0.04654 *  
## sys_os_with_verLinux     1.564e-03  4.429e-02   0.035  0.97182    
## sys_os_with_verWin 10.0 -4.416e-03  3.225e-02  -0.137  0.89107    
## sys_os_with_verWin 5.1   2.227e-01  7.683e-02   2.898  0.00377 ** 
## sys_os_with_verWin 5.2   9.109e-01  6.461e-01   1.410  0.15865    
## sys_os_with_verWin 6.0   5.502e-02  1.826e-01   0.301  0.76314    
## sys_os_with_verWin 6.1  -6.267e-03  3.387e-02  -0.185  0.85323    
## sys_os_with_verWin 6.2   6.671e-02  7.489e-02   0.891  0.37309    
## sys_os_with_verWin 6.3   1.523e-02  4.158e-02   0.366  0.71414    
## sys_archx86-64          -3.004e-02  2.360e-02  -1.273  0.20303    
## sys_cpu_count           -3.183e-03  3.842e-03  -0.829  0.40731    
## sys_mem                 -2.239e-06  1.234e-06  -1.814  0.06972 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6442 on 6269 degrees of freedom
## Multiple R-squared:  0.947,  Adjusted R-squared:  0.9468 
## F-statistic:  5599 on 20 and 6269 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: hang_time_diff
##                       Df Sum Sq Mean Sq    F value    Pr(>F)    
## p_hangs_diff           1  45833   45833 1.1045e+05 < 2.2e-16 ***
## has_hangs_both         1    521     521 1.2560e+03 < 2.2e-16 ***
## n_sess_diff            1      0       0 1.6500e-02   0.89766    
## e10s                   1      0       0 1.7970e-01   0.67164    
## has_addons             1      8       8 1.9368e+01 1.096e-05 ***
## has_affected_addons    1     79      79 1.9099e+02 < 2.2e-16 ***
## has_vdh                1      2       2 3.8748e+00   0.04906 *  
## num_addons_nonsys      1     11      11 2.6168e+01 3.222e-07 ***
## num_addons_affected    1      1       1 3.5571e+00   0.05934 .  
## sys_os_with_ver        8      9       1 2.7432e+00   0.00507 ** 
## sys_arch               1      1       1 3.1842e+00   0.07440 .  
## sys_cpu_count          1      2       2 5.4957e+00   0.01909 *  
## sys_mem                1      1       1 3.2907e+00   0.06972 .  
## Residuals           6269   2602       0                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Note that e10s status doesn’t affect the difference in hang times. Also, we see that OS and version is primarily picking up Windows XP. Use an indicator for this instead.

Refit a slimmer model.

dt_prof_hangs[, on_win_xp := sys_os == "Windows_NT" & sys_os_version == "5.1"]
fit <- lm(hang_time_diff ~ p_hangs_diff + has_hangs_both +
    has_addons + has_affected_addons + has_vdh +
    num_addons_nonsys + num_addons_affected +
    on_win_xp + sys_cpu_count + sys_mem,
    data = dt_prof_hangs)
summary(fit)
## 
## Call:
## lm(formula = hang_time_diff ~ p_hangs_diff + has_hangs_both + 
##     has_addons + has_affected_addons + has_vdh + num_addons_nonsys + 
##     num_addons_affected + on_win_xp + sys_cpu_count + sys_mem, 
##     data = dt_prof_hangs)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9724 -0.4302 -0.0071  0.4188  3.2009 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.449e-02  4.827e-02   1.129 0.259037    
## p_hangs_diff             7.732e+00  2.575e-02 300.207  < 2e-16 ***
## has_hangs_bothTRUE      -8.048e-01  2.809e-02 -28.652  < 2e-16 ***
## has_addonsTRUE          -1.650e-01  5.335e-02  -3.092 0.001997 ** 
## has_affected_addonsTRUE -1.968e-01  2.771e-02  -7.103 1.35e-12 ***
## has_vdhTRUE             -4.086e-02  2.770e-02  -1.475 0.140254    
## num_addons_nonsys        9.599e-03  1.644e-03   5.839 5.51e-09 ***
## num_addons_affected     -3.354e-02  1.712e-02  -1.958 0.050219 .  
## on_win_xpTRUE            2.458e-01  6.755e-02   3.639 0.000276 ***
## sys_cpu_count           -3.496e-03  3.826e-03  -0.914 0.360947    
## sys_mem                 -2.522e-06  1.220e-06  -2.068 0.038699 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.644 on 6279 degrees of freedom
## Multiple R-squared:  0.9469, Adjusted R-squared:  0.9468 
## F-statistic: 1.12e+04 on 10 and 6279 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: hang_time_diff
##                       Df Sum Sq Mean Sq    F value    Pr(>F)    
## p_hangs_diff           1  45833   45833 1.1051e+05 < 2.2e-16 ***
## has_hangs_both         1    521     521 1.2567e+03 < 2.2e-16 ***
## has_addons             1      8       8 1.9468e+01 1.040e-05 ***
## has_affected_addons    1     79      79 1.9112e+02 < 2.2e-16 ***
## has_vdh                1      2       2 3.9027e+00  0.048251 *  
## num_addons_nonsys      1     11      11 2.5456e+01 4.653e-07 ***
## num_addons_affected    1      1       1 3.3183e+00  0.068560 .  
## on_win_xp              1      7       7 1.6830e+01 4.140e-05 ***
## sys_cpu_count          1      3       3 7.4019e+00  0.006533 ** 
## sys_mem                1      2       2 4.2758e+00  0.038699 *  
## Residuals           6279   2604       0                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Plot overall residuals.
dt_prof_hangs[, resid := residuals(fit)][, fitted := fitted.values(fit)]
qplot(resid, data = dt_prof_hangs, geom = "histogram", bins = 40, fill = I("white"), colour = I("black")) +
    labs(title = "Histogram of residuals", x = "Residuals", y = "Count")

dt_prof_hangs[, car::qqPlot(resid, main = "Normal QQ plot of residuals")]

## NULL
qplot(fitted, resid, data = dt_prof_hangs) +
    geom_hline(yintercept = 0) +
    labs(title = "Residuals vs. fitted values", x = "Fitted", y = "Residuals")

## Split plots by add-on type.
qplot(resid, data = dt_prof_hangs, geom = "histogram", bins = 40, fill = I("white"), colour = I("black")) +
    labs(title = "Histogram of residuals by add-on type", x = "Residuals", y = "Count") +
    facet_wrap(~ addon_type, scales = "free_y")

qplot(fitted, resid, data = dt_prof_hangs) +
    geom_hline(yintercept = 0) +
    labs(title = "Residuals vs. fitted values by add-on type", x = "Fitted", y = "Residuals") +
    facet_wrap(~ addon_type)


(In progress)

Startup times

## Only consider sessions where firstpaint came after XPI bootstrapping start.
dt_startup <- dt_good[startup_XPIstart > 0 & startup_firstpaint > 0 &
    startup_XPIstart < startup_firstpaint]
dt_startup[, startup_affected := log(startup_firstpaint - startup_XPIstart + 1)]
qplot(period, startup_affected,# data = dt_startup,
    data = dt_startup[startup_affected < quantile(startup_affected, 0.99)],
    geom = "boxplot") +
    facet_grid(e10s ~ addon_state) +
    labs(title = "Session startup times (XPI bootstrap start to firstpaint) - truncated",
        x = "Period",
        y = "log(firstpaint - XPI_bootstrap_start)")

dt_startup_prof <- dt_startup[, c(list(
    both_periods = length(unique(period)) == 2,
    has_hangs = any(has_hangs),
    startup_diff = median(startup_affected[period == "after"]) - median(startup_affected[period == "before"])),
    setNames(lapply(per_client_cols, function(ccol) { get(ccol)[1] }), per_client_cols)),
    by = client_id]
dt_startup_prof <- dt_startup_prof[both_periods == TRUE]
add_addon_type_col(dt_startup_prof)
dt_startup_prof[, extreme := startup_diff > quantile(startup_diff, 0.99) |
    startup_diff < quantile(startup_diff, 0.01)]
dt_startup_prof[, has_addons := num_addons_nonsys > 0][,
    has_affected_addons := num_addons_affected > 0][,
    has_medium_js_addons := num_addons_medium_js > 0][,
    has_heavy_js_addons := num_addons_heavy_js > 0]
dt_startup_prof[, sys_os_with_ver := ifelse(sys_os == "Windows_NT", sprintf("Win %s", sys_os_version), sys_os)]

qplot(addon_type, startup_diff, data = dt_startup_prof[extreme == FALSE],
        geom = "boxplot") +
    geom_hline(yintercept = 0, size = 0.5, colour = "blue") +
    facet_wrap(~e10s_state) +
    labs(title = "Change in median startup times (XPI bootstrap start to firstpaint) - truncated",
        x = "Client's add-ons type",
        y = "Difference in log(firstpaint - XPI_bootstrap_start)")

fit <- lm(startup_diff ~
    has_hangs +
    #e10s +
    has_addons +
    #has_affected_addons +
    #has_medium_js_addons +
    #has_heavy_js_addons +
    #has_vdh +
    #num_addons_nonsys +
    num_addons_affected +
    #num_addons_medium_js +
    #num_addons_heavy_js +
    sys_os_with_ver
    #sys_arch +
    #sys_cpu_count +
    #sys_mem,
    ,data = dt_startup_prof)
summary(fit)
## 
## Call:
## lm(formula = startup_diff ~ has_hangs + has_addons + num_addons_affected + 
##     sys_os_with_ver, data = dt_startup_prof)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2266 -0.2120  0.0089  0.2118  8.2825 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.0629672  0.0328780   1.915 0.055494 .  
## has_hangsTRUE           -0.0764219  0.0114019  -6.703 2.14e-11 ***
## has_addonsTRUE          -0.0942235  0.0253663  -3.715 0.000205 ***
## num_addons_affected     -0.0265653  0.0081925  -3.243 0.001188 ** 
## sys_os_with_verLinux     0.0423748  0.0310543   1.365 0.172424    
## sys_os_with_verWin 10.0 -0.0462795  0.0220901  -2.095 0.036189 *  
## sys_os_with_verWin 5.1  -0.0009702  0.0433286  -0.022 0.982137    
## sys_os_with_verWin 5.2  -0.6879331  0.3890185  -1.768 0.077023 .  
## sys_os_with_verWin 6.0   0.0328637  0.0993361   0.331 0.740776    
## sys_os_with_verWin 6.1  -0.0477755  0.0225644  -2.117 0.034256 *  
## sys_os_with_verWin 6.2  -0.0973456  0.0458391  -2.124 0.033721 *  
## sys_os_with_verWin 6.3  -0.0682090  0.0269601  -2.530 0.011419 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5494 on 11802 degrees of freedom
## Multiple R-squared:  0.01104,    Adjusted R-squared:  0.01012 
## F-statistic: 11.98 on 11 and 11802 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: startup_diff
##                        Df Sum Sq Mean Sq F value    Pr(>F)    
## has_hangs               1   24.4 24.3515 80.6883 < 2.2e-16 ***
## has_addons              1    4.5  4.5001 14.9112 0.0001133 ***
## num_addons_affected     1    2.9  2.8685  9.5047 0.0020542 ** 
## sys_os_with_ver         8    8.0  1.0045  3.3283 0.0008275 ***
## Residuals           11802 3561.8  0.3018                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dt_startup_prof[, resid := residuals(fit)][, fitted := fitted.values(fit)]
qplot(resid, data = dt_startup_prof, geom = "histogram", bins = 100, fill = I("white"), colour = I("black")) +
    labs(title = "Histogram of residuals", x = "Residuals", y = "Count")

dt_startup_prof[, car::qqPlot(resid, main = "Normal QQ plot of residuals")]

## NULL
qplot(fitted, resid, data = dt_startup_prof) +
    geom_hline(yintercept = 0) +
    labs(title = "Residuals vs. fitted values", x = "Fitted", y = "Residuals")